---
title: "This R code is for the non-linear programming simulation"
author: "Arthur Zeyang Yu"
date: 'June 5, 2023'
output: html_document
---

```{r setup, include = TRUE}
knitr::opts_chunk$set(echo = TRUE)

# clear memory
rm(list = ls())

# use packages
library("ggplot2")
library("xlsx") # package for write results in xlsx

# set directories
data_path <- "C:/Users/arthu/Dropbox/research/mte_at/codes/replication/data"
results_path <- "C:/Users/arthu/Dropbox/research/mte_at/codes/replication/results"
```

# preparation: compute useful quantities used in the paper
## replicate Table D1: Relevant Quantities in Simulation Set-up
## replicate Table D2: Summary of Identifiable Quantities
```{r}
# def: MTR pairs (reverse the MTR pairs in the MT paper)
m0_u <- function(x) {
  y <- 0.35 - 0.3 * x - 0.05 * x^2
  return(y)
}

m1_u <- function(x) {
  y <- 0.9 - 1.1 * x + 0.3 * x^2
  return(y)
}

# def: MTE function (reverse the MTE in the MT paper)
mte <- function(x){
  # y <- 0.55 - 0.8*x + 0.35*(x^2)
  y <- m1_u(x) - m0_u(x)
  return(y)
}

# def: unobserved heterogeneity in outcome process
e_u0_u <- function(x){
  y <- 0.167 - 0.3 * x - 0.05 * x^2
  return(y)
}

e_u1_u <- function(x){
  y <- 0.45 - 1.1 * x + 0.3 * x^2
  return(y)
}

# def: propensity scores
p0 <- 0.2 # P[D = 1 | Z = 0]
p1 <- 0.5 # P[D = 1 | Z = 1]

# def: proportion of compliance types
p_complier <- 0.3 # proportion of complier
p_at <- 0.2 # proportion of always-taker
p_nt <- 0.5 # proportion of never-taker

# compute: E[Z] and Var(Z) where Z: binary instrument
e_z <- 0.5
var_z <- e_z * (1 - e_z)

# compute: E[D] (P[D = 1]) and Var(D) where D: binary treatment
e_d <- (p_at * (1 - e_z)) + ( (p_complier + p_at) * e_z)
var_d <- e_d * (1 - e_d)

# compute P[D = 0]
p_d_0 <- 1 - e_d

# def: P[D | Z]
p_d0z1 <- p_nt
p_d0z0 <- (1 - e_d - (p_d0z1 * e_z))/(1 - e_z)
p_d1z0 <- p_at
p_d1z1 <- p_complier + p_d1z0

# compute: E[Y(d) | D = s], where s, d \in \{0, 1\}
int_1_temp <- integrate(m1_u, p1, 1)$value
int_2_temp <- integrate(m1_u, p0, 1)$value
e_y1_d0_1 <- (1/(1 - p1)) * int_1_temp * (p_d0z1 * e_z)/(1 - e_d)
e_y1_d0_2 <-(1/(1 - p0)) * int_2_temp * (p_d0z0 * (1 - e_z))/(1 - e_d)
e_y1_d0 <- e_y1_d0_1  + e_y1_d0_2 # E[Y(1) | D = 0]
e_y1_d0

int_1_temp <- integrate(m0_u, p1, 1)$value
int_2_temp <- integrate(m0_u, p0, 1)$value
e_y0_d0_1 <- (1/(1 - p1)) * int_1_temp * (p_d0z1 * e_z)/(1 - e_d)
e_y0_d0_2 <-(1/(1 - p0)) * int_2_temp * (p_d0z0 * (1 - e_z))/(1 - e_d)
e_y0_d0 <- e_y0_d0_1  + e_y0_d0_2 # E[Y(0) | D = 0]
e_y0_d0

int_1_temp <- integrate(m1_u, 0, p1)$value
int_2_temp <- integrate(m1_u, 0, p0)$value
e_y1_d1_1 <- (1/p1) * int_1_temp * (p_d1z1 * e_z)/(e_d)
e_y1_d1_2 <-(1/p0) * int_2_temp * (p_d1z0 * (1 - e_z))/(e_d)
e_y1_d1 <- e_y1_d1_1  + e_y1_d1_2 # E[Y(1) | D = 1]
e_y1_d1

int_1_temp <- integrate(m0_u, 0, p1)$value
int_2_temp <- integrate(m0_u, 0, p0)$value
e_y0_d1_1 <- (1/p1) * int_1_temp * (p_d1z1 * e_z)/(e_d)
e_y0_d1_2 <-(1/p0) * int_2_temp * (p_d1z0 * (1 - e_z))/(e_d)
e_y0_d1 <- e_y0_d1_1  + e_y0_d1_2 # E[Y(0) | D = 1]
e_y0_d1

# compute: expectation of potential outcomes
e_y1 <- e_y1_d1 * e_d + e_y1_d0 * (1 - e_d)
e_y1 <- integrate(m1_u, 0, 1)$value
e_y0 <- e_y0_d1 * e_d + e_y0_d0 * (1 - e_d)
e_y0 <- integrate(m0_u, 0, 1)$value

# compute: b in the STR and SMTR assumption
e_y1_d0 - e_y0_d0
e_y1_d1 - e_y0_d1
b_str <- 0.5

# compute: cov(D, Z)
cov_dz <- p_complier * var_z # cov(D, Z)/var(Z) = proportion of complier
cov_dz

# compute: E[Y | Z = 0]
e_yz00 <- (1 / (1 - p0)) * 0.8 * integrate(m0_u, p0, 1)$value
e_yz01 <- (1 / p0) * 0.2 * integrate(m1_u, 0, p0)$value
e_yz0 <- e_yz00 + e_yz01

# compute: E[Y | Z = 1]
e_yz10 <- (1 / (1 - p1)) * 0.5 * integrate(m0_u, p1, 1)$value
e_yz11 <- (1 / p1) * 0.5 * integrate(m1_u, 0, p1)$value
e_yz1 <- e_yz10 + e_yz11

# compute Wald estimand
beta_iv_wald <- (e_yz1 - e_yz0)/p_complier

# compute: E[Y | Z = 0, D = 1]
ey_z0d1 <- (1 / p0) * integrate(m1_u, 0, p0)$value

# compute: E[Y | Z = 1, D = 0]
ey_z1d0 <- (1 / (1 - p1)) * integrate(m0_u, p1, 1)$value

# compute: E[Y(1)] and E[Y(0)]
e_y0 <- integrate(m0_u, 0, 1)$value
e_y1 <- integrate(m1_u, 0, 1)$value

# compute: E[Y(1) | P(Z) = p, D = 1], Z = 0
ey1_p0d1 <- e_y1 + (1 / p0) * integrate(e_u1_u, 0, p0)$value

# compute: E[Y(1) | P(Z) = p, D = 1], Z = 1
ey1_p1d1 <- e_y1 + (1 / p1) * integrate(e_u1_u, 0, p1)$value

# compute: E[Y(0) | P(Z) = p, D = 0], Z = 0
ey0_p0d0 <- e_y0 + (1 / (1 - p0)) * integrate(e_u0_u, p0, 1)$value

# compute: E[Y(0) | P(Z) = p, D = 0], Z = 1
ey0_p1d0 <- e_y0 + (1 / (1 - p1)) * integrate(e_u0_u, p1, 1)$value

# compute: E[Y]
e_y <- e_yz0 * e_z + e_yz1 * e_z
e_y

# compute: E[YZ] = E[ Y | Z = 1] P[Z = 1]
e_yz <- e_yz1 * e_z
e_yz

# compute: cov(Y, Z)
cov_yz <- e_yz - (e_y * e_z)
cov_yz

# compute: beta_{IV}
beta_iv <- cov_yz/cov_dz
beta_iv

# compute: E[1[D = d, Z = z] Y] with d, z \in {0, 1}
e_d1z1 <- e_z * integrate(m1_u, 0, p1)[[1]]
e_d1z0 <- (1 - e_z) * integrate(m1_u, 0, p0)[[1]]
e_d0z1 <- e_z * integrate(m0_u, p1, 1)[[1]]
e_d0z0 <- (1 - e_z) * integrate(m0_u, p0, 1)[[1]]
e_d1z1 + e_d1z0 + e_d0z1 + e_d0z0 # verify with e_y
e_y

# compute: true value of ATE of AT
at_y0_true <- integrate(m0_u, 0, p0)[[1]] * (1/p0)
at_y0_true
at_y1_true <- integrate(m1_u, 0, p0)[[1]] * (1/p0)
at_y1_true
ate_at_true <- integrate(mte, 0, p0)[[1]] * (1/p0)
ate_at_true

# compute: true value of ATE of NT
nt_y0_true <- integrate(m0_u, p1, 1)[[1]] * (1/(1 - p1))
nt_y0_true
nt_y1_true <- integrate(m1_u, p1, 1)[[1]] * (1/(1 - p1))
nt_y1_true
ate_nt_true <- integrate(mte, p1, 1)[[1]] * (1/(1 - p1))
ate_nt_true

# compute: true value of ATE of compliers
complier_y0_true <- integrate(m0_u, p0, p1)[[1]] * (1/(p1 - p0))
complier_y0_true
complier_y1_true <- integrate(m1_u, p0, p1)[[1]] * (1/(p1 - p0))
complier_y1_true
late <- integrate(mte, p0, p1)[[1]] * (1/(p1 - p0))
late
```

# plot MTR pairs
## replicate Figure 3: Plot of Marginal Treatment Response (MTR) Pairs
```{r}
# use ggplot to generate plot of MTR pairs
mtr_pair_plot <- ggplot(data.frame(x = c(0, 1)), aes(x)) +
  stat_function(fun = function(x) m0_u(x),
                aes(colour = "MTR of Y(0)")) +
  stat_function(fun = function(x) m1_u(x),
                aes(colour = "MTR of Y(1)")) +
  scale_colour_manual("",
                      values = c("black", "red")) +
  xlab("Unobserved Heterogeneity in Treatment Choice (U)") +
  ylab("Expectation of Potential Outcomes")
ggsave(paste0(results_path, "/figure3.pdf"))
```

# compute: point identification based on Brinch et al. (2017)
## replicate Section D.4.1 Point Identification Based on Parametric Assumptions on MTE functions
```{r}
# compute: \mu_{0} and \alpha_{0} by solving system of equations
A0 <- matrix(c(1, 1, (p0/2), (p1/2)), 2, 2)
b0 <- c(ey0_p0d0, ey0_p1d0)
mu_0 <- solve(A0, b0)[1]
alpha_0 <- solve(A0, b0)[2]

# compute: \mu_{1} and \alpha_{1} by solving system of equations
A1 <- matrix(c(1, 1, ((p0 - 1)/2), ((p1 - 1)/2)), 2, 2)
b1 <- c(ey1_p0d1, ey1_p1d1)
mu_1 <- solve(A1, b1)[1]
alpha_1 <- solve(A1, b1)[2]

# define: approximated MTR pairs based on linearity assumption
m0_u_l <- function(x) {
  y <- mu_0 + alpha_0 * x - 0.5 * alpha_0
  return(y)
}

m1_u_l <- function(x) {
  y <- mu_1 + alpha_1 * x - 0.5 * alpha_1
  return(y)
}

# compute: ATE of AT
ate_at_lin_mte <- ey_z0d1 - (1/p0) * integrate(m0_u_l, 0, p0)$value
ate_at_lin_mte

# compute: ATE of NT
ate_nt_lin_mte <- (1/(1 - p1)) * integrate(m1_u_l, p1, 1)$value - ey_z1d0
ate_nt_lin_mte
```

# plot MTR and approximated pairs
## replicate Figure 4: Plot of MTR and Approximated MTR Pairs
```{r}
# use ggplot to generate plot of MTR and approximated MTR pairs
mtr_appr_mtr_plot <- ggplot(data.frame(x = c(0, 1)), aes(x)) +
  stat_function(fun = function(x) m0_u(x),
                aes(colour = "MTR of Y(0)")) +
  stat_function(fun = function(x) m1_u(x),
                aes(colour = "MTR of Y(1)")) +
  stat_function(fun = function(x) m0_u_l(x),
                linetype="dashed",
                aes(colour = "Approximated MTR of Y(0)")) +
  stat_function(fun = function(x) m1_u_l(x),
                linetype="dashed",
                aes(colour = "Approximated MTR of Y(1)")) +
  scale_colour_manual("",
                      values = c("black", "red", "black", "red"),
                      guide = guide_legend(override.aes = list(
                        linetype = c("dashed", "dashed", "solid", "solid")))) +
  xlab("Unobserved Heterogeneity in Treatment Choice (U)") +
  ylab("Expectation of Potential Outcomes")
ggsave(paste0(results_path, "/figure4.pdf"))
```

# compute: Manski bound
## replicate Section D.4.3 Manski Bounds
```{r}
# ATE of always-takers: Manski bound
bd_at_manski_l <- ey_z0d1 - 1
bd_at_manski_u <- ey_z0d1 - 0
bd_at_manski_l
bd_at_manski_u

# ATE of never-takers: Manski bound
bd_nt_manski_l <- 0 - ey_z1d0
bd_nt_manski_u <- 1 - ey_z1d0
bd_nt_manski_l
bd_nt_manski_u
```

# compute: bound based on monotone treatment response
## replicate Section D.4.4 Bounds of Monotone Treatment Response
```{r}
# trivial, we skip the code
```

# compute: bound based on STR/SMTR assumption
## replicate Table D5: Sensitivity Analysis of b in STR and SMTR Assumptions
```{r}
# compute: STR bound of ATE of AT
delta_at <- (p1 - p0) * complier_y0_true + (1 - p1) * nt_y0_true
bd_at_str_l <- at_y1_true - (1/p0) * (e_y + b_str * e_d - delta_at)
bd_at_str_u <- at_y1_true - (1/p0) * (e_y - b_str * e_d - delta_at)
bd_at_str_l
bd_at_str_u

# compute: STR bound of ATE of NT
delta_nt <- (p1 - p0) * complier_y1_true + p0 * at_y1_true
## rmk: E[|D - 1|] = P[D = 0]
bd_nt_str_l <- (1/(1 - p1)) * (e_y - b_str * p_d_0 - delta_nt) - nt_y0_true
bd_nt_str_u <- (1/(1 - p1)) * (e_y + b_str * p_d_0 - delta_nt) - nt_y0_true
bd_nt_str_l
bd_nt_str_u

# compute: SMTR bound of ATE of AT
bd_at_smtr_l <- at_y1_true - (1/p0) * (e_y - delta_at)
bd_at_smtr_u <- at_y1_true - (1/p0) * (e_y - (b_str * e_d) - delta_at)
bd_at_smtr_l
bd_at_smtr_u

# compute: SMTR bound of ATE of NT
bd_nt_smtr_l <- (1/(1 - p1)) * (e_y - delta_nt) - nt_y0_true
bd_nt_smtr_u <- (1/(1 - p1)) * (e_y + b_str * p_d_0 - delta_nt) - nt_y0_true
bd_nt_smtr_l
bd_nt_smtr_u

# compute: sensitivity analysis of b
b_seq <- c(0.2, 0.25, 0.3, 0.41, 0.45, 0.5) # sequence of different b
sens_b_str <- matrix(NA, nrow = 6, ncol = 5) # results matrix for STR
sens_b_smtr <- matrix(NA, nrow = 6, ncol = 5) # results matrix for SMTR

## compute bounds of STR: loop over different b
for(i in seq(1, 6)){

  # write b in b and write b in mat
  b <- b_seq[i]
  sens_b_str[i, 1] <- b_seq[i]
  
  # bounds for AT
  sens_b_str[i, 2] <- at_y1_true - (1/p0) * (e_y + b * e_d - delta_at)
  sens_b_str[i, 3] <- at_y1_true - (1/p0) * (e_y - b * e_d - delta_at)
  
  # bounds for NT
  sens_b_str[i, 4] <- (1/(1 - p1)) * (e_y - b * p_d_0 - delta_nt) - nt_y0_true
  sens_b_str[i, 5] <- (1/(1 - p1)) * (e_y + b * p_d_0 - delta_nt) - nt_y0_true
  
}

## compute bounds of SMTR: loop over different b
for(i in seq(1, 6)){
  
  # write b in b and write b in mat
  b <- b_seq[i]
  sens_b_smtr[i, 1] <- b_seq[i]
  
  # bounds for AT
  sens_b_smtr[i, 2] <- at_y1_true - (1/p0) * (e_y - delta_at)
  sens_b_smtr[i, 3] <- at_y1_true - (1/p0) * (e_y - (b * e_d) - delta_at)
  
  # bounds for NT
  sens_b_smtr[i, 4] <- (1/(1 - p1)) * (e_y - delta_nt) - nt_y0_true
  sens_b_smtr[i, 5] <- (1/(1 - p1)) * (e_y + b * p_d_0 - delta_nt) - nt_y0_true
  
}

# view sensitivity analysis results
write.xlsx(sens_b_str, 
           file = paste0(results_path, "/table_d5_a.xlsx"), 
           sheetName = "Sheet1")

write.xlsx(sens_b_smtr, 
           file = paste0(results_path, "/table_d5_b.xlsx"), 
           sheetName = "Sheet1")
```
